import os
import re
import cv2
import pytesseract


def extract_text(image_path, mode=0):
    # 读取图像
    image = cv2.imread(image_path)

    # 将图像转换为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 对图像进行二值化处理
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # 保存二值化图像，以便后续处理
    file_name = os.path.splitext(os.path.basename(image_path))[0]
    cv2.imwrite(f"{file_name}_binary.png", binary)

    # 使用Tesseract进行文字识别
    if mode == 0:
        extracted_text = pytesseract.image_to_string(binary, lang='chi_sim').replace(' ', '')
        return re.sub(r'\n+', '\n', extracted_text)
    else:
        ex_text = pytesseract.image_to_string(binary, lang='eng')
        return ex_text


if __name__ == '__main__':
    image_path = "./zh_text.png"
    extracted_text = extract_text(image_path)
    print(extracted_text)

